* Replication code for "Taxicab Tipping and Sunlight"
* First download the compressed data and extract to a folder under "C:\data\" 
* The taxicab data was obtained from Haggag and Paci (2014) Default tips American Economic Journal: Applied Economics Vol 6(3) 
* Its data source: https://www.aeaweb.org/articles?id=10.1257/app.6.3.1
* The hourly solar data was obtained from National Solar Radiation Database (http://rredc.nrel.gov/solar/old_data/nsrdb/1991-2010/hourly/siteonthefly.cgi?id=744860)

clear all
set maxvar 9999
set matsize 11000

* Change the file path if necessary
use "C:\data\merged file_hrlydata.dta"

* create weekday variable
gen weekday=.
replace weekday = 1 if  drf_dow==1
replace weekday = 1 if  drf_dow==2
replace weekday = 1 if  drf_dow==3
replace weekday = 1 if  drf_dow==4
replace weekday = 1 if  drf_dow==5
replace weekday = 0 if  drf_dow==6
replace weekday = 0 if  drf_dow==0

* Create weekend variable
clonevar weekend= weekday
recode weekend (0=1) (1=0)

* Create rushhour variable defined as 7 AM to 10 AM and again from 4 PM to 7 PM during weekdays
gen rushhour=.
replace rushhour=1 if  hour==7 & weekend==0
replace rushhour=1 if  hour==8 & weekend==0
replace rushhour=1 if  hour==9 & weekend==0
replace rushhour=1 if  hour==16 & weekend==0
replace rushhour=1 if  hour==17 & weekend==0
replace rushhour=1 if  hour==18 & weekend==0
replace rushhour=0 if  rushhour!=1

* Generate squared temperature
gen tavgdegfsq=tavgdegf*tavgdegf

/* Label month*/
label define month2 1 "Jan" 2 "Feb" 3 "Mar" 4 "Apr" 5 "May" 6 "Jun" 7 "Jul" 8 "Aug" 9 "Sep" 10 "Oct" 11 "Nov" 12 "Dec"
label values month month2

* Drop if dependent variable is missing and also an outlier for passenger count
drop if tip_frac==.
drop if passenger_count==129

* generate day of the year and hour of the day for clustered standard errors
egen hourday = group(pkp_day hour)

* Create continuous lux variable
gen etrwhm2lux = etrwhm2 /0.0079

* Generate lux categoies
gen lux = .
replace lux=0 if etrwhm2lux <1
replace lux=1 if etrwhm2lux >=1 & etrwhm2lux <=40
replace lux=2 if etrwhm2lux >40 & etrwhm2lux <200
replace lux=3 if etrwhm2lux >=200 & etrwhm2lux <=400
replace lux=4 if etrwhm2lux >400 & etrwhm2lux <1000
replace lux=5 if etrwhm2lux >=1000 & etrwhm2lux <=2000
replace lux=6 if etrwhm2lux >2000 & etrwhm2lux <=20000
replace lux=7 if etrwhm2lux >20000 & etrwhm2lux <=110000
replace lux=8 if etrwhm2lux >110000 & etrwhm2lux <=120000
replace lux=9 if etrwhm2lux >120000

* summary stats as shown in Appendix
pwcorr tip_frac lux snow prcp tavgdegf  tavgdegfsq distance ride_duration  passenger_count vendor menu_tip rushhour weekday month , sig
summ tip_frac lux snow prcp tavgdegf  tavgdegfsq distance ride_duration  passenger_count vendor menu_tip rushhour weekday

* Figure 1: Hourly variation in Lux
graph box etrwhm2lux, over(hour) nooutsides

* Figure 2: Association between tip fraction and sunlight
preserve
collapse tip_frac (sd) sdtip_frac = tip_frac (count) n=tip_frac, by(lux) 
generate hitip_frac = tip_frac + invttail(n-1,0.025)*(sdtip_frac / sqrt(n))
generate lowtip_frac = tip_frac - invttail(n-1,0.025)*(sdtip_frac / sqrt(n))
twoway line tip_frac lux || rcap hitip_frac lowtip_frac lux
restore

*Table 1 = Main results
reg tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday   ,  vce(cluster hourday)

reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday   , absorb(driver_id) vce(cluster hourday)

reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday   , absorb(driver_id pkp_day) vce(cluster hourday)

reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)


* Robustness test#1 - in footnote:  Alternate measures of sunniness
reghdfe  tip_frac etrwhm2lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)

reghdfe  tip_frac etrwhm2 snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)

* Robustness test#2 - in footnote: 4 Random samples
* 5% sample
preserve
count
set seed 1234567
sample 5
count
reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)
restore

* another 5% sample
preserve
count
set seed 2345678
sample 5
count
reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)
restore

* another 5% sample
preserve
count
set seed 3456789
sample 5
count
reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)
restore

* another 5% sample
preserve
count
set seed 4567890
sample 5
count
reghdfe tip_frac lux snow prcp tavgdegf  tavgdegfsq ride_duration distance passenger_count vendor menu_tip rushhour weekday i.month  , absorb(driver_id pkp_day) vce(cluster hourday)
restore

